{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numpy import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.14142136, -0.98994949],\n",
       "       [-0.98994949,  0.14142136]])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "U, Sigma, VT = linalg.svd([[1, 1], [7, 7]])\n",
    "U"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([10.,  0.])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Sigma  # 对角矩阵，将对角线上的元素变为了数组形式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.70710678, -0.70710678],\n",
       "       [-0.70710678,  0.70710678]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "VT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def loadExData():\n",
    "    return [[1, 1, 1, 0, 0],\n",
    "           [2, 2, 2, 0, 0],\n",
    "           [1, 1, 1, 0, 0],\n",
    "           [5, 5, 5, 0, 0],\n",
    "           [1, 1, 0, 2, 2],\n",
    "           [0, 0, 0, 3, 3],\n",
    "           [0, 0, 0, 1, 1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9.72140007e+00, 5.29397912e+00, 6.84226362e-01, 4.96619610e-16,\n",
       "       1.57294073e-16])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Data = loadExData()\n",
    "U, Sigma, VT = linalg.svd(Data)\n",
    "Sigma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,\n",
       "         -4.04082150e-16, -3.93673809e-16],\n",
       "        [ 2.00000000e+00,  2.00000000e+00,  2.00000000e+00,\n",
       "          3.23525928e-16,  3.58220398e-16],\n",
       "        [ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,\n",
       "         -6.96599896e-16, -6.79252661e-16],\n",
       "        [ 5.00000000e+00,  5.00000000e+00,  5.00000000e+00,\n",
       "         -1.04950770e-16, -3.25260652e-17],\n",
       "        [ 1.00000000e+00,  1.00000000e+00, -2.22044605e-16,\n",
       "          2.00000000e+00,  2.00000000e+00],\n",
       "        [ 1.11022302e-16,  3.88578059e-16, -5.55111512e-16,\n",
       "          3.00000000e+00,  3.00000000e+00],\n",
       "        [ 4.16333634e-17,  1.31838984e-16, -2.08166817e-16,\n",
       "          1.00000000e+00,  1.00000000e+00]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 重构原始矩阵\n",
    "Sig3 = mat([[Sigma[0], 0, 0], [0, Sigma[1], 0], [0, 0, Sigma[2]]])\n",
    "U[:, :3] * Sig3 * VT[:3, :]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 基于协同过滤的推荐引擎"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numpy import *\n",
    "from numpy import linalg as la"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 相似度计算，其中inA, inB都是列向量\n",
    "def ecludSim(inA, inB):\n",
    "    return 1.0 / (1.0 + la.norm(inA - inB))\n",
    "\n",
    "def pearsSim(inA, inB):\n",
    "    if len(inA) < 3:\n",
    "        return 1.0\n",
    "    return 0.5 + 0.5 * corrcoef(inA, inB, rowvar=0)[0][1]\n",
    "\n",
    "def cosSim(inA, inB):\n",
    "    num = float(inA.T * inB)\n",
    "    denom = la.norm(inA) * la.norm(inB)\n",
    "    return 0.5 + 0.5 * (num / denom)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[1, 1, 1, 0, 0],\n",
       "        [2, 2, 2, 0, 0],\n",
       "        [1, 1, 1, 0, 0],\n",
       "        [5, 5, 5, 0, 0],\n",
       "        [1, 1, 0, 2, 2],\n",
       "        [0, 0, 0, 3, 3],\n",
       "        [0, 0, 0, 1, 1]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "myMat = mat(loadExData())\n",
    "myMat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.13367660240019172"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 欧氏距离计算相似度\n",
    "ecludSim(myMat[:, 0], myMat[:, 4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ecludSim(myMat[:, 0], myMat[:, 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5472455591261534"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 余弦计算相似度\n",
    "cosSim(myMat[:, 0], myMat[:, 4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9999999999999999"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cosSim(myMat[:, 0], myMat[:, 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.23768619407595826"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 皮尔逊计算相似度\n",
    "pearsSim(myMat[:, 0], myMat[:, 4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pearsSim(myMat[:, 0], myMat[:, 0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 示例：餐馆菜肴推荐引擎"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算在给定相似度计算方法的条件下，用户对物品的估计评分值\n",
    "# dataMat 行表示用户，列表示物品\n",
    "def standEst(dataMat, user, simMeas, item):\n",
    "    n = shape(dataMat)[1]  # 获取总物品个数\n",
    "    simTotal = 0.0\n",
    "    ratSimTotal = 0.0\n",
    "    for j in range(n):\n",
    "        userRating = dataMat[user, j]  # 该用户对物品j的评分\n",
    "        if userRating == 0:  # 未评分则跳过\n",
    "            continue\n",
    "        # 对物品item和j都评过分的用户编号\n",
    "        overLap = nonzero(logical_and(dataMat[:, item].A>0, dataMat[:, j].A>0))[0]\n",
    "        if len(overLap) == 0:  # 若不存在则这两个物品相似度为0\n",
    "            similarity = 0\n",
    "        else:\n",
    "            similarity = simMeas(dataMat[overLap, item], dataMat[overLap, j])\n",
    "        simTotal += similarity  # 计算总相似度\n",
    "        ratSimTotal += similarity * userRating  # 根据该用户评分来计算加权评分\n",
    "    # 最后返回对该用户于该物品估算的评分\n",
    "    if simTotal == 0:\n",
    "        return 0\n",
    "    else:\n",
    "        return ratSimTotal / simTotal\n",
    "    \n",
    "def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):\n",
    "    unratedItems = nonzero(dataMat[user, :].A == 0)[1]  # 获取该用户为评分的物品编号\n",
    "    if len(unratedItems) == 0:\n",
    "        return 'you rated everything'\n",
    "    itemScores = []\n",
    "    for item in unratedItems:\n",
    "        estimatedScore = estMethod(dataMat, user, simMeas, item)\n",
    "        itemScores.append((item, estimatedScore))\n",
    "    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[4, 4, 0, 2, 2],\n",
       "        [4, 0, 0, 3, 3],\n",
       "        [4, 0, 0, 1, 1],\n",
       "        [1, 1, 1, 2, 0],\n",
       "        [2, 2, 2, 0, 0],\n",
       "        [1, 1, 1, 0, 0],\n",
       "        [5, 5, 5, 0, 0]])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "myMat = mat([[4, 4, 0, 2, 2],\n",
    "             [4, 0, 0, 3, 3],\n",
    "             [4, 0, 0, 1, 1],\n",
    "             [1, 1, 1, 2, 0],\n",
    "             [2, 2, 2, 0, 0],\n",
    "             [1, 1, 1, 0, 0],\n",
    "             [5, 5, 5, 0, 0]])\n",
    "myMat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(2, 2.5), (1, 2.0243290220056256)]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommend(myMat, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(2, 3.0), (1, 2.8266504712098603)]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommend(myMat, 2, simMeas=ecludSim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(2, 2.5), (1, 2.0)]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommend(myMat, 2, simMeas=pearsSim)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 利用SVD提高推荐的效果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numpy import linalg as la"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "myMat = mat([[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],\n",
    "             [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],\n",
    "             [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],\n",
    "             [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],\n",
    "             [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],\n",
    "             [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],\n",
    "             [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],\n",
    "             [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],\n",
    "             [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],\n",
    "             [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],\n",
    "             [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([15.77075346, 11.40670395, 11.03044558,  4.84639758,  3.09292055,\n",
       "        2.58097379,  1.00413543,  0.72817072,  0.43800353,  0.22082113,\n",
       "        0.07367823])"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "U, Sigma, VT = la.svd(myMat)\n",
    "Sigma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "541.9999999999993"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看有多少个奇异值能达到总能量的90%\n",
    "Sig2 = Sigma**2\n",
    "sum(Sig2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "487.7999999999994"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(Sig2) * 0.9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "500.5002891275791"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 前3个特征包含了90%的量\n",
    "sum(Sig2[:3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 基于SVD的评分估计\n",
    "def svdEst(dataMat, user, simMeas, item):\n",
    "    n = shape(dataMat)[1]\n",
    "    simTotal = 0.0\n",
    "    ratSimTotal = 0.0\n",
    "    \n",
    "    # 奇异值分解\n",
    "    nums = 4\n",
    "    U, Sigma, VT = la.svd(dataMat)\n",
    "    Sig4 = mat(eye(nums) * Sigma[:nums])  # 构建对角矩阵\n",
    "    xformedItems = dataMat.T * U[:, :nums] * Sig4.I  # 构建转化后的物品\n",
    "    \n",
    "    for j in range(n):\n",
    "        userRating = dataMat[user, j]\n",
    "        if userRating == 0 or j == item:\n",
    "            continue\n",
    "        similarity = simMeas(xformedItems[item, :].T, xformedItems[j, :].T)\n",
    "#         print('the %d and %d similarity is %f' % (item, j, similarity))\n",
    "        simTotal += similarity\n",
    "        ratSimTotal += similarity * userRating\n",
    "    if simTotal == 0:\n",
    "        return 0\n",
    "    else:\n",
    "        return ratSimTotal / simTotal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(4, 3.344714938469228), (7, 3.329402072452697), (9, 3.328100876390069)]"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommend(myMat, 1, estMethod=svdEst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(4, 3.346952186702173), (9, 3.3353796573274703), (6, 3.3071930278130375)]"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommend(myMat, 1, estMethod=svdEst, simMeas=pearsSim)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 基于SVD的图像压缩"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 输出图像\n",
    "def printMat(inMat, thresh=0.8):\n",
    "    for i in range(32):\n",
    "        for k in range(32):\n",
    "            if float(inMat[i, k]) > thresh:\n",
    "                print(1, end='')\n",
    "            else:\n",
    "                print(0, end='')\n",
    "        print('')\n",
    "\n",
    "def imgCompress(numSV=3, thresh=0.8):\n",
    "    # 读取原图像并输出\n",
    "    myl = []\n",
    "    for line in open('0_5.txt').readlines():\n",
    "        newRow = []\n",
    "        for i in range(32):\n",
    "            newRow.append(int(line[i]))\n",
    "        myl.append(newRow)\n",
    "    myMat = mat(myl)\n",
    "    print('****original matrix******')\n",
    "    printMat(myMat, thresh)\n",
    "    \n",
    "    # 奇异值分解图像\n",
    "    U, Sigma, VT = la.svd(myMat)\n",
    "    SigRecon = mat(zeros((numSV, numSV)))\n",
    "    for k in range(numSV):\n",
    "        SigRecon[k, k] = Sigma[k]\n",
    "    \n",
    "    # 重构图像并输出\n",
    "    reconMat = U[:, :numSV] * SigRecon * VT[:numSV, :]\n",
    "    print('****reconstructed matrix using %d sigular values******' % numSV)\n",
    "    printMat(reconMat, thresh)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "****original matrix******\n",
      "00000000000000110000000000000000\n",
      "00000000000011111100000000000000\n",
      "00000000000111111110000000000000\n",
      "00000000001111111111000000000000\n",
      "00000000111111111111100000000000\n",
      "00000001111111111111110000000000\n",
      "00000000111111111111111000000000\n",
      "00000000111111100001111100000000\n",
      "00000001111111000001111100000000\n",
      "00000011111100000000111100000000\n",
      "00000011111100000000111110000000\n",
      "00000011111100000000011110000000\n",
      "00000011111100000000011110000000\n",
      "00000001111110000000001111000000\n",
      "00000011111110000000001111000000\n",
      "00000011111100000000001111000000\n",
      "00000001111100000000001111000000\n",
      "00000011111100000000001111000000\n",
      "00000001111100000000001111000000\n",
      "00000001111100000000011111000000\n",
      "00000000111110000000001111100000\n",
      "00000000111110000000001111100000\n",
      "00000000111110000000001111100000\n",
      "00000000111110000000011111000000\n",
      "00000000111110000000111111000000\n",
      "00000000111111000001111110000000\n",
      "00000000011111111111111110000000\n",
      "00000000001111111111111110000000\n",
      "00000000001111111111111110000000\n",
      "00000000000111111111111000000000\n",
      "00000000000011111111110000000000\n",
      "00000000000000111111000000000000\n",
      "****reconstructed matrix using 2 sigular values******\n",
      "00000000000000000000000000000000\n",
      "00000000000000000000000000000000\n",
      "00000000000001111100000000000000\n",
      "00000000000011111111000000000000\n",
      "00000000000111111111100000000000\n",
      "00000000001111111111110000000000\n",
      "00000000001111111111110000000000\n",
      "00000000011110000000001000000000\n",
      "00000000111100000000001100000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001110000000\n",
      "00000000111100000000001100000000\n",
      "00000000001111111111111000000000\n",
      "00000000001111111111110000000000\n",
      "00000000001111111111110000000000\n",
      "00000000000011111111100000000000\n",
      "00000000000011111111000000000000\n",
      "00000000000000000000000000000000\n"
     ]
    }
   ],
   "source": [
    "imgCompress(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
